Grabbing SPINS gradients

## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.8     v dplyr   1.0.9
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Loading required package: prettyGraphs
## 
## Loading required package: ExPosition
## 
## 
## Attaching package: 'plotly'
## 
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## 
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## 
## 
## Attaching package: 'colorspace'
## 
## 
## The following object is masked from 'package:PTCA4CATA':
## 
##     lighten

Read in the SPINS big table

## New names:
## Rows: 164640 Columns: 8
## -- Column specification
## -------------------------------------------------------- Delimiter: "," chr
## (4): ROI, Network, Subject, Site dbl (4): ...1, grad1, grad2, grad3
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `` -> `...1`

read subject data

##  [1] "record_id"                        "scanner"                         
##  [3] "diagnostic_group"                 "demo_sex"                        
##  [5] "demo_age_study_entry"             "scog_rmet_total"                 
##  [7] "scog_er40_total"                  "scog_tasit1_total"               
##  [9] "scog_tasit2_sinc"                 "scog_tasit2_simpsar"             
## [11] "scog_tasit2_parsar"               "scog_tasit3_lie"                 
## [13] "scog_tasit3_sar"                  "np_domain_tscore_process_speed"  
## [15] "np_domain_tscore_att_vigilance"   "np_domain_tscore_work_mem"       
## [17] "np_domain_tscore_verbal_learning" "np_domain_tscore_visual_learning"
## [19] "np_domain_tscore_reasoning_ps"
## New names:
## Rows: 467 Columns: 43
## -- Column specification
## -------------------------------------------------------- Delimiter: "," chr
## (4): record_id, scanner, diagnostic_group, demo_sex dbl (36): ...1,
## demo_age_study_entry, scog_rmet_total, scog_er40_total, scog... lgl (3):
## exclude_MRI, exclude_meanFD, exclude_earlyTerm
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `` -> `...1`

Check subject overlap

grad.sub <- spins_grads_wide$Subject[order(spins_grads_wide$Subject)]
behav.sub <- lol_spins_behav$record_id[order(lol_spins_behav$record_id)]

# behav.sub[behav.sub %in% grad.sub == FALSE]
# grad.sub[grad.sub %in% behav.sub == FALSE]

# complete.cases(spins_grads_wide)
# complete.cases(lol_spins_behav)
kept.sub <- lol_spins_behav$record_id[complete.cases(lol_spins_behav)==TRUE] # 420

## grab the matching data

behav.dat <- lol_spins_behav[kept.sub,c(6:19)]
spins_grads_wide_org <- spins_grads_wide[,-1]
rownames(spins_grads_wide_org) <- spins_grads_wide$Subject
grad.dat <- spins_grads_wide_org[kept.sub,]

## variables to regress out
regout.dat <- var2regout_num[kept.sub,]

Demographics

behav_all <- lol_spins_behav[kept.sub,]

table_one <- CreateTableOne(vars = colnames(behav_all)[4:19], strata="diagnostic_group",data=behav_all)

lol_demo <- 
  read_csv('../data/spins_lolivers_subject_info_for_grads_2022-04-21(withcomposite).csv') %>%
  filter(exclude_MRI==FALSE, 
         exclude_meanFD==FALSE, 
         exclude_earlyTerm==FALSE) %>% as.data.frame
## New names:
## Rows: 467 Columns: 46
## -- Column specification
## -------------------------------------------------------- Delimiter: "," chr
## (4): record_id, scanner, diagnostic_group, demo_sex dbl (39): ...1,
## demo_age_study_entry, scog_rmet_total, scog_er40_total, scog... lgl (3):
## exclude_MRI, exclude_meanFD, exclude_earlyTerm
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `` -> `...1`
lol_demo$subject <- sub("SPN01_", "sub-", lol_demo$record_id) %>% sub("_", "", .)
rownames(lol_demo) <- lol_demo$record_id
lol_demo_match <- lol_demo[kept.sub,]

spins_demo <- lol_demo_match %>% 
  select(demo_sex, demo_age_study_entry, diagnostic_group, scog_rmet_total, scog_er40_total, #scog_mean_ea,
         scog_tasit1_total,
         scog_tasit2_total, scog_tasit3_total,np_composite_tscore, np_domain_tscore_att_vigilance,
         np_domain_tscore_process_speed, np_domain_tscore_work_mem,
         np_domain_tscore_verbal_learning, np_domain_tscore_visual_learning,
         np_domain_tscore_reasoning_ps, 
         #bsfs_sec2_total, bsfs_sec3_total, bsfs_sec3_total, bsfs_sec4_total, bsfs_sec5_total, bsfs_sec6_total,
         #fd_mean_rest
  ) %>% data.frame
colnames(spins_demo)
##  [1] "demo_sex"                         "demo_age_study_entry"            
##  [3] "diagnostic_group"                 "scog_rmet_total"                 
##  [5] "scog_er40_total"                  "scog_tasit1_total"               
##  [7] "scog_tasit2_total"                "scog_tasit3_total"               
##  [9] "np_composite_tscore"              "np_domain_tscore_att_vigilance"  
## [11] "np_domain_tscore_process_speed"   "np_domain_tscore_work_mem"       
## [13] "np_domain_tscore_verbal_learning" "np_domain_tscore_visual_learning"
## [15] "np_domain_tscore_reasoning_ps"
rownames(spins_demo) <- lol_demo_match$subject

spins_demo %>%
  group_by(diagnostic_group) %>%
  summarise_if(is.numeric, mean, na.rm = TRUE) %>% t
##                                  [,1]       [,2]      
## diagnostic_group                 "case"     "control" 
## demo_age_study_entry             "31.41532" "31.94767"
## scog_rmet_total                  "24.56855" "27.59649"
## scog_er40_total                  "31.83539" "33.54651"
## scog_tasit1_total                "22.50000" "24.63953"
## scog_tasit2_total                "47.52823" "54.47093"
## scog_tasit3_total                "48.35102" "54.71512"
## np_composite_tscore              "35.42387" "49.57059"
## np_domain_tscore_att_vigilance   "39.49794" "47.64912"
## np_domain_tscore_process_speed   "39.69355" "53.06395"
## np_domain_tscore_work_mem        "41.27016" "49.15698"
## np_domain_tscore_verbal_learning "40.66532" "50.30233"
## np_domain_tscore_visual_learning "38.72984" "48.37791"
## np_domain_tscore_reasoning_ps    "42.91129" "48.75581"
spins_demo %>%
  group_by(diagnostic_group) %>%
  summarize_if(is.numeric, sd, na.rm = TRUE) %>% t
##                                  [,1]        [,2]       
## diagnostic_group                 "case"      "control"  
## demo_age_study_entry             " 9.768209" "10.395267"
## scog_rmet_total                  "5.258051"  "3.821886" 
## scog_er40_total                  "4.549732"  "3.319822" 
## scog_tasit1_total                "3.640750"  "2.135267" 
## scog_tasit2_total                "8.526169"  "4.228042" 
## scog_tasit3_total                "7.271608"  "5.264379" 
## np_composite_tscore              "12.93041"  "11.01147" 
## np_domain_tscore_att_vigilance   "11.65920"  "12.71612" 
## np_domain_tscore_process_speed   "13.16429"  "10.09612" 
## np_domain_tscore_work_mem        "11.19371"  "11.36136" 
## np_domain_tscore_verbal_learning "8.937756"  "9.438716" 
## np_domain_tscore_visual_learning "12.45736"  "10.06134" 
## np_domain_tscore_reasoning_ps    "10.97108"  " 9.54391"
cbind(table(spins_demo$diagnostic_group, spins_demo$demo_sex), table(spins_demo$diagnostic_group))
##         female male    
## case        79  169 248
## control     80   92 172

Regress out the effects

table(regout.dat$demo_sex_num)
## 
##   0   1 
## 159 261
behav.reg <- apply(behav.dat, 2, function(x) lm(x~regout.dat$demo_sex + regout.dat$demo_age_study_entry + regout.dat$fd_mean_rest)$residual)

grad.reg <- apply(grad.dat, 2, function(x) lm(x~regout.dat$demo_sex + regout.dat$demo_age_study_entry + regout.dat$fd_mean_rest)$residual)

grad.reg2plot <- apply(grad.dat, 2, function(x){
  model <- lm(x~regout.dat$demo_sex + regout.dat$demo_age_study_entry + regout.dat$fd_mean_rest)
  return(model$residual + model$coefficient[1])
} )

grab some network colours

networks <- read_delim("../networks.txt", 
                       "\t", escape_double = FALSE, trim_ws = TRUE) %>%
  select(NETWORK, NETWORKKEY, RED, GREEN, BLUE, ALPHA) %>%
  distinct() %>%
  add_row(NETWORK = "Subcortical", NETWORKKEY = 13, RED = 0, GREEN=0, BLUE=0, ALPHA=255) %>%
  mutate(hex = rgb(RED, GREEN, BLUE, maxColorValue = 255)) %>%
  arrange(NETWORKKEY)
## Rows: 718 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr (4): LABEL, HEMISPHERE, NETWORK, GLASSERLABELNAME
## dbl (8): INDEX, KEYVALUE, RED, GREEN, BLUE, ALPHA, NETWORKKEY, NETWORKSORTED...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
networks$hex <- darken(networks$hex, 0.2)

# oi <- networks$hex
# swatchplot(
#   "-40%" = lighten(oi, 0.4),
#   "-20%" = lighten(oi, 0.2),
#   "  0%" = oi,
#   " 20%" =  darken(oi, 0.2),
#   " 25%" =  darken(oi, 0.25),
#   " 30%" =  darken(oi, 0.3),
#   " 35%" =  darken(oi, 0.35),
#   off = c(0, 0)
# )

# networks

get row and column designs

Run PLS-C

pls.res <- tepPLS(behav.reg, grad.reg, center2 = FALSE, scale2 = FALSE, DESIGN = sub.dx$diagnostic_group, make_design_nominal = TRUE, graphs = FALSE)

pls.boot <- data4PCCAR::Boot4PLSC(behav.reg, grad.reg, scale1 = TRUE, center2 = FALSE, scale2 = FALSE, nIter = 1000, nf2keep = 4)
## Registered S3 method overwritten by 'data4PCCAR':
##   method                  from     
##   print.str_colorsOfMusic PTCA4CATA
# ## swith direction for dimension 3
pls.res$TExPosition.Data$fi[,1] <- pls.res$TExPosition.Data$fi[,1]*-1
pls.res$TExPosition.Data$fj[,1] <- pls.res$TExPosition.Data$fj[,1]*-1
pls.res$TExPosition.Data$pdq$p[,1] <- pls.res$TExPosition.Data$pdq$p[,1]*-1
pls.res$TExPosition.Data$pdq$q[,1] <- pls.res$TExPosition.Data$pdq$q[,1]*-1
pls.res$TExPosition.Data$lx[,1] <- pls.res$TExPosition.Data$lx[,1]*-1
pls.res$TExPosition.Data$ly[,1] <- pls.res$TExPosition.Data$ly[,1]*-1

## Scree plot
PlotScree(pls.res$TExPosition.Data$eigs)

## Print singular values
pls.res$TExPosition.Data$pdq$Dv
##  [1] 53.836609 15.734772 14.187177 11.529298 11.442135 10.418114  9.348353
##  [8]  8.914484  8.304195  7.637942  7.131881  6.849054  6.265523  5.693384
## Print eigenvalues
pls.res$TExPosition.Data$eigs
##  [1] 2898.38047  247.58304  201.27598  132.92471  130.92245  108.53709
##  [7]   87.39170   79.46803   68.95966   58.33815   50.86372   46.90954
## [13]   39.25678   32.41463
pls.res$TExPosition.Data$t
##  [1] 69.2857737  5.9184715  4.8115016  3.1775647  3.1297007  2.5945788
##  [7]  2.0890982  1.8996828  1.6484804  1.3945734  1.2158971  1.1213724
## [13]  0.9384331  0.7748715
## Compare the inertia to the largest possible inertia
sum(cor(behav.dat, grad.dat)^2)
## [1] 81.59259
sum(cor(behav.dat, grad.dat)^2)/(ncol(behav.dat)*ncol(grad.dat))
## [1] 0.004955818

Here, we show that the effect that PLSC decomposes is pretty small to begin with. The effect size of the correlation between the two tables is 92.40 which accounts for 0.0065 of the largest possible effect.

Results

Dimension 1

lxly.out[[1]]

gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)

PrettyBarPlot2(pls.res$TExPosition.Data$fi[,1],
               threshold = 0, 
               color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,1] == TRUE, behav.dx$type.col, "grey90"),
               horizontal = FALSE, main = "Scores - behavioural")
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
cor.heat <- pls.res$TExPosition.Data$X %>% heatmap(col = col.heat)

## control
grad.dat.ctrl <- grad.dat[sub.dx$diagnostic_group == "control",]
behav.dat.ctrl <- behav.dat[sub.dx$diagnostic_group == "control",]
corX.ctrl <- cor(as.matrix(behav.dat.ctrl),as.matrix(grad.dat.ctrl))
heatmap(corX.ctrl[cor.heat$rowInd, cor.heat$colInd], col = col.heat, Rowv = NA, Colv = NA)

## case
grad.dat.case <- grad.dat[sub.dx$diagnostic_group == "case",]
behav.dat.case <- behav.dat[sub.dx$diagnostic_group == "case",]
corX.case <- cor(as.matrix(behav.dat.case),as.matrix(grad.dat.case))
heatmap(corX.case[cor.heat$rowInd, cor.heat$colInd], col = col.heat, Rowv = NA, Colv = NA)

Dimension 2

lxly.out[[2]]

gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)

PrettyBarPlot2(pls.res$TExPosition.Data$fi[,2],
               threshold = 0, color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,2] == TRUE, behav.dx$type.col, "grey90"), 
               horizontal = FALSE, main = "Scores - behavioural")

dim1.est <- pls.res$TExPosition.Data$pdq$Dv[1]*as.matrix(pls.res$TExPosition.Data$pdq$p[,1], ncol = 1) %*% t(as.matrix(pls.res$TExPosition.Data$pdq$q[,1], ncol = 1))


cor.heat.res1 <- (pls.res$TExPosition.Data$X - dim1.est) %>% heatmap(col = col.heat)

Dimension 3

lxly.out[[3]]

gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)

PrettyBarPlot2(pls.res$TExPosition.Data$fi[,3],
               threshold = 0, color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,3] == TRUE, behav.dx$type.col, "grey90"),
               horizontal = FALSE, main = "Scores - behavioural")

dim2.est <- (as.matrix(pls.res$TExPosition.Data$pdq$p[,1:2]) %*% pls.res$TExPosition.Data$pdq$Dd[1:2,1:2] %*% t(as.matrix(pls.res$TExPosition.Data$pdq$q[,1:2])))


cor.heat.res2 <- heatmap(pls.res$TExPosition.Data$X - dim2.est, col = col.heat)

Dimension 4

lxly.out[[4]]

gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)

PrettyBarPlot2(pls.res$TExPosition.Data$fi[,4],
               threshold = 0, 
               color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,4] == TRUE, behav.dx$type.col, "grey90"),
               horizontal = FALSE, main = "Scores - behavioural")


dim3.est <- (as.matrix(pls.res$TExPosition.Data$pdq$p[,1:3]) %*% pls.res$TExPosition.Data$pdq$Dd[1:3,1:3] %*% t(as.matrix(pls.res$TExPosition.Data$pdq$q[,1:3])))


cor.heat.res3 <- heatmap(pls.res$TExPosition.Data$X - dim3.est, col = col.heat)

Back into the brain

Dimension 1

## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'

Dimension 2

## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'

Dimension 3

## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'

Dimension 4

## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'

Fancy figures

Cohen’s

## plot case
case_roi_results <- spins_mean_bygrp[,1:5] %>%
  pivot_longer(grad1.case:grad3.case, names_to = "gradient", values_to = "grad_value") %>%
  mutate(roi = str_remove(ROI, "_ROI"),
         label = case_when(str_starts(ROI, "L") ~ str_c("lh_", roi),
                           str_starts(ROI, "R_") ~ str_c("rh_", roi))) %>%
  filter(Network != "Subcortical") %>%
  filter(ROI != "L_10pp_ROI") %>%
  as.data.frame() %>%
  group_by(gradient) %>%
  ggplot() +
  geom_brain(mapping = aes(fill = grad_value),
        atlas = glasser) +
  facet_wrap(~gradient, ncol = 1, labeller = labeller(gradient = 
                                                        c("grad1.case" = "Gradient 1",
                                                          "grad2.case" = "Gradient 2",
                                                          "grad3.case" = "Gradient 3")
  )) +
  scale_fill_distiller(name = "Scores", palette = "BrBG", limits = c(-1.3,1.5), values = c(0, 0.286, 0.464, 0.643, 1)) +
  ggtitle("SSD")+  
  theme(axis.text.y.left = element_blank(), 
        axis.text.x.bottom = element_blank()) + 
  theme_brain(text.family = "Calibri")

## plot control
control_roi_results <- spins_mean_bygrp[,c(1:2, 6:8)] %>%
  pivot_longer(grad1.control:grad3.control, names_to = "gradient", values_to = "grad_value") %>%
  mutate(roi = str_remove(ROI, "_ROI"),
         label = case_when(str_starts(ROI, "L") ~ str_c("lh_", roi),
                           str_starts(ROI, "R_") ~ str_c("rh_", roi))) %>%
  filter(Network != "Subcortical") %>%
  filter(ROI != "L_10pp_ROI") %>%
  as.data.frame() %>%
  group_by(gradient) %>%
  ggplot() +
  geom_brain(mapping = aes(fill = grad_value),
        atlas = glasser) +
  facet_wrap(~gradient, ncol = 1, labeller = labeller(gradient = 
                                                        c("grad1.control" = "Gradient 1",
                                                          "grad2.control" = "Gradient 2",
                                                          "grad3.control" = "Gradient 3")
  )) +
  scale_fill_distiller(name = "Scores", palette = "BrBG", limits = c(-1.3,1.5), values = c(0, 0.286, 0.464, 0.643, 1)) +
  ggtitle("Controls")+  
  theme(axis.text.y.left = element_blank(), 
        axis.text.x.bottom = element_blank()) + 
  theme_brain(text.family = "Calibri")

## compute the difference with GLM
all_roi_results <- spins_grad_everything %>%
  pivot_longer(grad1:grad3, names_to = "gradient", values_to = "grad_value") %>%
    ungroup() %>%
  group_by(Network, gradient, ROI) %>%
  do(tidy(lm(grad_value ~ diagnostic_group, data = .))) %>%
  ungroup() %>%
  group_by(term) %>%
  mutate(p_FDR = p.adjust(p.value, method = "fdr"))

## plot the F statistics
all_roi_results %>%
  filter(term == "diagnostic_groupcontrol") %>%
  mutate(roi = str_remove(ROI, "_ROI"),
         label = case_when(str_starts(ROI, "L") ~ str_c("lh_", roi),
                           str_starts(ROI, "R_") ~ str_c("rh_", roi))) %>%
  filter(Network != "Subcortical") %>%
  filter(ROI != "L_10pp_ROI") %>%
  as.data.frame() %>%
  group_by(gradient) %>%
  ggplot() +
  geom_brain(mapping = aes(fill = statistic),
        atlas = glasser) +
  facet_wrap(~gradient, ncol = 1, labeller = labeller(gradient = 
                                                        c("grad1" = "Gradient 1",
                                                          "grad2" = "Gradient 2",
                                                          "grad3" = "Gradient 3")
  )) +
  scale_fill_distiller(name = "F value", palette = "RdBu", limits = c(-7,7), values = c(0, 0.25, 0.5, 0.75, 1)) +
  ggtitle("Gradients: Controls > SSD")+
  theme(axis.text.y.left = element_blank(), 
        axis.text.x.bottom = element_blank()) + 
  theme_brain(text.family = "Calibri")
## merging atlas and data by 'label'
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

3D plot of the gradients

Group difference

Dimension 1

We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.

Dimension 2

We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.

Dimension 3

We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.

Relationship to symptoms

Correlation to composite scores

## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string

Correlation to individual scores